********************************************************************************
* Code used to define the DI sample and calculate proximity to the bend points *
********************************************************************************;

* NOTE: All file paths and data names that need to be inserted are identified with <>

/* 
The base data is from the DAF2010. We asked for a limited number of variables and also asked
for the following restrictions when the files were extracted from the servers:
if BIC='A'; 		*restricted it to the primary beneficiary
if TOC_NUM=1; 		*limiting it to non-changing types of claim
if TOC1 in (5 6); 	*disability case not reduced for age
if NDOF=1;			*there is a single date of filing 

There is the Demographic File and a compilation of the Annual Files.

We also got a mortality update to 2020 to look at long-term outcomes.

We also linked the data to the Masters Earnings File, which allows us to repeat the earnings 
analysis from the previous paper.
*/

*** There are the file paths;
libname data     "<insert file path>\data\raw2\";
libname working  "<insert file path>\data\working files\";

data starttime ;
  sdate=today() ;
  stime=time() ;
  put // '<<<  start time  >>>' ;
  put    '--------------------' ;
  put    'date: ' sdate date7. ;
  put    'time: ' stime time. ;
  put // ' ' ;
run;


*** Limiting the variables;
data a;
set data.<demographic_file>;
keep 
ssn 
ssieligdt ssiawarddt ssdi_onset /*date variables related to filing and eligibility*/
dobbest dobflag dodbest dodflag /*date variables related to birth and death*/
ime1-ime50 firstime npia_new npia_orig pia1-pia50 pied1-pied50 /*the AIME and PIA variables*/
judlvl1-judlvl11 rdt1-rdt11 dig1-dig5 male /*rest are for sample restrictions and heterogeneity*/  
;

*** Cleaning up the AIME and PIA variables ***;
data b;
set a;
* All of the following code sort out the average indexed monthly earnings (IMEn) variable. 
There are up to 50 places for IME to be, and there are three issues: 
(1) it sometimes changes value
(2) there can be multiple IME variables but they don't change so it is still useable
(3) IME variables can be missing and yet there is an IME appearing later (e.g., IME2 is missing but there is an IME3). 
The code calculates the number of changes to the IME (variable is CHANGES), and extracts the IME in the case it does not change. 
Nearly have IME1 correctly filled in, so we just use that and also extract the first PIA calculation date (PIEDn) for these cases.;

* Restriction - removing the several cases with more than 50 entries;
if npia<50; 

* Restriction - removing observations where there is no value for the first IME entry;
if ime1 in (. 0) then delete;

* Calculating whether the IME changes;
array aime(50) ime1-ime50;
array aimeb(50) imeb1-imeb50; 
array change(50) changes1-changes50;
array miss(50) missing1-missing50;

do i = 1 to npia; *up to number of ime entries;
if aime(i)>0 then aimeb(i)=aime(i); *changes zeroes to missing;
else if aime(i)=0 then aimeb(i)=.;
if aimeb(i)=. then miss(i)=1; *identifies non-missing cases where consecutive values are different;
else miss(i)=0;
end;

* Identifies non-missing cases where consecutive values of IME are different;
do i = 1 to npia;
if aimeb(i)>0 & aimeb(i+1)>0 & (aime(i+1)-aime(i)^=0) then change(i)=1;
else change(i)=0;
end;

* Sums the number of changes to IME;
changes=sum(OF changes1-changes50);

* The first IME value is ime1, the first PIA values is pia1, and calculating the PIA date;
pia_year=year(pied1);
pia_month=month(pied1);
pia_yyyymm=pia_year*100 + pia_month;

drop ime16-ime50 imeb16-imeb50 changes16-changes50 missing16-missing50
	pia16-pia50 piarfc2-piarfc50 pied16-pied50;

*** Creating some new variables ***;
data c;
set b;
* Application dates;
applic_year=year(bdof1);
applic_month=month(bdof1);
applic_yyyymm=applic_year*100 + applic_month;

* Eligibility dates - based on PIA calculation;
elig_year=year(doec);
elig_month=month(doec);
elig_yyyymm=elig_year*100 + elig_month;

* Age that application was filed;
age_filing=int((bdof1 - dobbest)/365.25);

*Creating identifiers for covariates;
if rp=1 then white=1; else white=0;
if rp=2 then black=1; else black=0;
if rp=3 then otherrace=1; else otherrace=0;

*Adding identifiers for the level at which benefits were awarded: dds is 
at the initial level, reconsid is reconsideration, and if neither appear 
then assign hearings on the assumption that award happened at ALJ or higher;
     if judlvl1='A' & rdt1='A' then dds=1;
else if judlvl2='A' & rdt2='A' then dds=1;
else if judlvl3='A' & rdt3='A' then dds=1;
else if judlvl4='A' & rdt4='A' then dds=1;
else if judlvl5='A' & rdt5='A' then dds=1;
else if judlvl6='A' & rdt6='A' then dds=1;
else if judlvl7='A' & rdt7='A' then dds=1;
else if judlvl8='A' & rdt8='A' then dds=1;
else if judlvl9='A' & rdt9='A' then dds=1;
else if judlvl10='A' & rdt10='A' then dds=1;
else dds=0;

     if judlvl1='B' & rdt1='A' then reconsid=1;
else if judlvl2='B' & rdt2='A' then reconsid=1;
else if judlvl3='B' & rdt3='A' then reconsid=1;
else if judlvl4='B' & rdt4='A' then reconsid=1;
else if judlvl5='B' & rdt5='A' then reconsid=1;
else if judlvl6='B' & rdt6='A' then reconsid=1;
else if judlvl7='B' & rdt7='A' then reconsid=1;
else if judlvl8='B' & rdt8='A' then reconsid=1;
else if judlvl9='B' & rdt9='A' then reconsid=1;
else if judlvl10='B' & rdt10='A' then reconsid=1;
else reconsid=0;

if dds=0 & reconsid=0 then hearings=1; else hearings=0;

*The new bend points seem to come into effect some time in May each 
year, which is based on looking at the values in the data;
if 199506<=pia_yyyymm<=199605 then imeyear=1995; 
if 199606<=pia_yyyymm<=199705 then imeyear=1996; 
if 199706<=pia_yyyymm<=199805 then imeyear=1997; 
if 199806<=pia_yyyymm<=199905 then imeyear=1998; 
if 199906<=pia_yyyymm<=200005 then imeyear=1999; 
if 200006<=pia_yyyymm<=200105 then imeyear=2000; 
if 200106<=pia_yyyymm<=200205 then imeyear=2001; 
if 200206<=pia_yyyymm<=200305 then imeyear=2002; 
if 200306<=pia_yyyymm<=200405 then imeyear=2003; 
if 200406<=pia_yyyymm<=200505 then imeyear=2004; 
if 200506<=pia_yyyymm<=200605 then imeyear=2005; 
if 200606<=pia_yyyymm<=200705 then imeyear=2006; 
if 200706<=pia_yyyymm<=200805 then imeyear=2007; 
if 200806<=pia_yyyymm<=200905 then imeyear=2008; 
if 200906<=pia_yyyymm<=201005 then imeyear=2009; 
if 201006<=pia_yyyymm<=201105 then imeyear=2010; 
if 1995<=imeyear<2010;

proc sort; by imeyear; 
run;

*** Creating the indexes for the National Wage Index and the CPI ***;

* The NWI is always applied 2 years after, so awi_year reflects the true NWI year   
* but the year that it is applied is the imeyear. The % cost of living increase,    
* which took effect in December, is from the year after. cola_cum is the cumulative 
* values of the cpi increases                                                       ; 

* Note, wage_index here is lagged the two years ;

data indexes; input 
imeyear awi_year wage_index cola    cola_cum; cards;
1992	1990	21027.98	3		1.683504961
1993	1991	21811.6		2.6		1.634470836
1994	1992	22935.42	2.8		1.593051497
1995	1993	23132.67	2.6		1.549660989
1996	1994	23753.53	2.9		1.510390828
1997	1995	24705.66	2.1		1.467823934
1998	1996	25913.9		1.3		1.437633628
1999	1997	27426		2.5		1.419184233
2000	1998	28861.44	3.5		1.384569983
2001	1999	30469.84	2.6		1.337748776
2002	2000	32154.82	1.4		1.303848709
2003	2001	32921.92	2.1		1.285846853
2004	2002	33252.09	2.7		1.259399465
2005	2003	34064.95	4.1		1.226289644
2006	2004	35648.55	3.3		1.177991973
2007	2005	36952.94	2.3		1.14036009
2008	2006	38651.41	5.8		1.114721496
2009	2007	40405.48	0		1.053612
2010	2008	41334.97	0		1.053612
2011	2009	40711.61	3.6		1.053612
2012	2010	41673.83	1.7		1.017
;

*** Changing everything based on CPI and calculating predicted PIA based on bend points ***; 
data d; merge c indexes; by imeyear;

* STEP 1 - Calculating the errors in nominal terms;

*Deflating the PIA on the later six months based only on the CPI increase for that year, 
because PIA is increased even though AIME stays the same; 
if 6<=pia_month<=11 then pia_step=pia1; else pia_step=pia1/(1+cola/100);

*Using the original 1977 national wage index values (which applies to
1979) and the original bend point values to calculate the bend points for each year; 
bp1=round(180*wage_index/9779.44); * first bend point; 
bp2=round(1085*wage_index/9779.44); * second bend point;

*Calculating the estimated PIA based on the bend points (rounding down
$0.10) and then calculating the error (difference from the actual); 
if ime1<=bp1 then pia_e=0.9*ime1; 
	else if bp1<ime1<=bp2 then pia_e=0.9*bp1 + 0.32*(ime1 - bp1); 
	else if ime1>bp2 then pia_e=0.9*bp1 + 0.32*(bp2 - bp1) + 0.15*(ime1 - bp2); 
pia_est1=round(pia_e, 0.1); 

* calculating the error; 
	error1=pia_step - pia_est1;

*Re-centering the running variables in relation to the bend points; 
	ime_bp1_step=ime1 - bp1; 
	ime_bp2_step=ime1 - bp2;

* STEP 2 - Converting to 2020 CPI values (was originally set to 2013 values, so just use a 2020-2013 scaling factor); 
	ime=ime1*cola_cum*1.1081; 
	pia=pia_step*cola_cum*1.1081; 
	pia_est=pia_est1*cola_cum*1.1081;  
	ime_bp1=ime_bp1_step*cola_cum*1.1081; 
	ime_bp2=ime_bp2_step*cola_cum*1.1081;

* STEP 3 � Adding family maximum;
	ime_fm_step=2257.297297; 
 	ime_fm=ime � ime_fm_step;

* Calculate error in 2020 dollars;
	error=pia - pia_est;

proc sort; by ssn; run;


*** Adding information on later mortality dates ***;

data mort;
set data.<date of death2020>;
keep ssn dod;
if dod~=.;
proc sort; by ssn; 
run;

*Merging the two together;
data d;
merge d mort;
by ssn;
if dodbest~=. then dodbest=dod;


*** Adding information on when first payments are made ***;

*Taking the identifiers from the sample;
data e;
set d;
sample=1;
keep ssn sample;

*Taking the payment information from the annual files;
data f;
set data.<annual files>;
keep ssn dirpay9601-dirpay9912 dirpay0001-dirpay1012 payo9601-payo9912 payo0001-payo1012
	conc9601-conc9912 conc0001-conc1012;
proc sort; by ssn; run;

*Merging the two together and calculating when payments first made;
data g;
merge e f;
by ssn;
if sample=1;

     if dirpay9601>0 then startpay=199601;
else if dirpay9602>0 then startpay=199602; 
else if dirpay9603>0 then startpay=199603; 
else if dirpay9604>0 then startpay=199604; 
else if dirpay9605>0 then startpay=199605; 
else if dirpay9606>0 then startpay=199606; 
else if dirpay9607>0 then startpay=199607; 
else if dirpay9608>0 then startpay=199608; 
else if dirpay9609>0 then startpay=199609; 
else if dirpay9610>0 then startpay=199610; 
else if dirpay9611>0 then startpay=199611; 
else if dirpay9612>0 then startpay=199612; 

else if dirpay9701>0 then startpay=199701;
else if dirpay9702>0 then startpay=199702; 
else if dirpay9703>0 then startpay=199703; 
else if dirpay9704>0 then startpay=199704; 
else if dirpay9705>0 then startpay=199705; 
else if dirpay9706>0 then startpay=199706; 
else if dirpay9707>0 then startpay=199707; 
else if dirpay9708>0 then startpay=199708; 
else if dirpay9709>0 then startpay=199709; 
else if dirpay9710>0 then startpay=199710; 
else if dirpay9711>0 then startpay=199711; 
else if dirpay9712>0 then startpay=199712; 

else if dirpay9801>0 then startpay=199801;
else if dirpay9802>0 then startpay=199802; 
else if dirpay9803>0 then startpay=199803; 
else if dirpay9804>0 then startpay=199804; 
else if dirpay9805>0 then startpay=199805; 
else if dirpay9806>0 then startpay=199806; 
else if dirpay9807>0 then startpay=199807; 
else if dirpay9808>0 then startpay=199808; 
else if dirpay9809>0 then startpay=199809; 
else if dirpay9810>0 then startpay=199810; 
else if dirpay9811>0 then startpay=199811; 
else if dirpay9812>0 then startpay=199812; 

else if dirpay9901>0 then startpay=199901;
else if dirpay9902>0 then startpay=199902; 
else if dirpay9903>0 then startpay=199903; 
else if dirpay9904>0 then startpay=199904; 
else if dirpay9905>0 then startpay=199905; 
else if dirpay9906>0 then startpay=199906; 
else if dirpay9907>0 then startpay=199907; 
else if dirpay9908>0 then startpay=199908; 
else if dirpay9909>0 then startpay=199909; 
else if dirpay9910>0 then startpay=199910; 
else if dirpay9911>0 then startpay=199911; 
else if dirpay9912>0 then startpay=199912; 

else if dirpay0001>0 then startpay=200001;
else if dirpay0002>0 then startpay=200002; 
else if dirpay0003>0 then startpay=200003; 
else if dirpay0004>0 then startpay=200004; 
else if dirpay0005>0 then startpay=200005; 
else if dirpay0006>0 then startpay=200006; 
else if dirpay0007>0 then startpay=200007; 
else if dirpay0008>0 then startpay=200008; 
else if dirpay0009>0 then startpay=200009; 
else if dirpay0010>0 then startpay=200010; 
else if dirpay0011>0 then startpay=200011; 
else if dirpay0012>0 then startpay=200012; 

else if dirpay0101>0 then startpay=200101;
else if dirpay0102>0 then startpay=200102; 
else if dirpay0103>0 then startpay=200103; 
else if dirpay0104>0 then startpay=200104; 
else if dirpay0105>0 then startpay=200105; 
else if dirpay0106>0 then startpay=200106; 
else if dirpay0107>0 then startpay=200107; 
else if dirpay0108>0 then startpay=200108; 
else if dirpay0109>0 then startpay=200109; 
else if dirpay0110>0 then startpay=200110; 
else if dirpay0111>0 then startpay=200111; 
else if dirpay0112>0 then startpay=200112; 

else if dirpay0201>0 then startpay=200201;
else if dirpay0202>0 then startpay=200202; 
else if dirpay0203>0 then startpay=200203; 
else if dirpay0204>0 then startpay=200204; 
else if dirpay0205>0 then startpay=200205; 
else if dirpay0206>0 then startpay=200206; 
else if dirpay0207>0 then startpay=200207; 
else if dirpay0208>0 then startpay=200208; 
else if dirpay0209>0 then startpay=200209; 
else if dirpay0210>0 then startpay=200210; 
else if dirpay0211>0 then startpay=200211; 
else if dirpay0212>0 then startpay=200212; 

else if dirpay0301>0 then startpay=200301;
else if dirpay0302>0 then startpay=200302; 
else if dirpay0303>0 then startpay=200303; 
else if dirpay0304>0 then startpay=200304; 
else if dirpay0305>0 then startpay=200305; 
else if dirpay0306>0 then startpay=200306; 
else if dirpay0307>0 then startpay=200307; 
else if dirpay0308>0 then startpay=200308; 
else if dirpay0309>0 then startpay=200309; 
else if dirpay0310>0 then startpay=200310; 
else if dirpay0311>0 then startpay=200311; 
else if dirpay0312>0 then startpay=200312; 

else if dirpay0401>0 then startpay=200401;
else if dirpay0402>0 then startpay=200402; 
else if dirpay0403>0 then startpay=200403; 
else if dirpay0404>0 then startpay=200404; 
else if dirpay0405>0 then startpay=200405; 
else if dirpay0406>0 then startpay=200406; 
else if dirpay0407>0 then startpay=200407; 
else if dirpay0408>0 then startpay=200408; 
else if dirpay0409>0 then startpay=200409; 
else if dirpay0410>0 then startpay=200410; 
else if dirpay0411>0 then startpay=200411; 
else if dirpay0412>0 then startpay=200412; 

else if dirpay0501>0 then startpay=200501;
else if dirpay0502>0 then startpay=200502; 
else if dirpay0503>0 then startpay=200503; 
else if dirpay0504>0 then startpay=200504; 
else if dirpay0505>0 then startpay=200505; 
else if dirpay0506>0 then startpay=200506; 
else if dirpay0507>0 then startpay=200507; 
else if dirpay0508>0 then startpay=200508; 
else if dirpay0509>0 then startpay=200509; 
else if dirpay0510>0 then startpay=200510; 
else if dirpay0511>0 then startpay=200511; 
else if dirpay0512>0 then startpay=200512; 

else if dirpay0601>0 then startpay=200601;
else if dirpay0602>0 then startpay=200602; 
else if dirpay0603>0 then startpay=200603; 
else if dirpay0604>0 then startpay=200604; 
else if dirpay0605>0 then startpay=200605; 
else if dirpay0606>0 then startpay=200606; 
else if dirpay0607>0 then startpay=200607; 
else if dirpay0608>0 then startpay=200608; 
else if dirpay0609>0 then startpay=200609; 
else if dirpay0610>0 then startpay=200610; 
else if dirpay0611>0 then startpay=200611; 
else if dirpay0612>0 then startpay=200612; 

else if dirpay0701>0 then startpay=200701;
else if dirpay0702>0 then startpay=200702; 
else if dirpay0703>0 then startpay=200703; 
else if dirpay0704>0 then startpay=200704; 
else if dirpay0705>0 then startpay=200705; 
else if dirpay0706>0 then startpay=200706; 
else if dirpay0707>0 then startpay=200707; 
else if dirpay0708>0 then startpay=200708; 
else if dirpay0709>0 then startpay=200709; 
else if dirpay0710>0 then startpay=200710; 
else if dirpay0711>0 then startpay=200711; 
else if dirpay0712>0 then startpay=200712; 

else if dirpay0801>0 then startpay=200801;
else if dirpay0802>0 then startpay=200802; 
else if dirpay0803>0 then startpay=200803; 
else if dirpay0804>0 then startpay=200804; 
else if dirpay0805>0 then startpay=200805; 
else if dirpay0806>0 then startpay=200806; 
else if dirpay0807>0 then startpay=200807; 
else if dirpay0808>0 then startpay=200808; 
else if dirpay0809>0 then startpay=200809; 
else if dirpay0810>0 then startpay=200810; 
else if dirpay0811>0 then startpay=200811; 
else if dirpay0812>0 then startpay=200812; 

else if dirpay0901>0 then startpay=200901;
else if dirpay0902>0 then startpay=200902; 
else if dirpay0903>0 then startpay=200903; 
else if dirpay0904>0 then startpay=200904; 
else if dirpay0905>0 then startpay=200905; 
else if dirpay0906>0 then startpay=200906; 
else if dirpay0907>0 then startpay=200907; 
else if dirpay0908>0 then startpay=200908; 
else if dirpay0909>0 then startpay=200909; 
else if dirpay0910>0 then startpay=200910; 
else if dirpay0911>0 then startpay=200911; 
else if dirpay0912>0 then startpay=200912; 

else if dirpay1001>0 then startpay=201001;
else if dirpay1002>0 then startpay=201002; 
else if dirpay1003>0 then startpay=201003; 
else if dirpay1004>0 then startpay=201004; 
else if dirpay1005>0 then startpay=201005; 
else if dirpay1006>0 then startpay=201006; 
else if dirpay1007>0 then startpay=201007; 
else if dirpay1008>0 then startpay=201008; 
else if dirpay1009>0 then startpay=201009; 
else if dirpay1010>0 then startpay=201010; 
else if dirpay1011>0 then startpay=201011; 
else if dirpay1012>0 then startpay=201012; 

*****************************************;
* Same for auxiliary payments            ;
     if payo9601>0 then startaux=199601;
else if payo9602>0 then startaux=199602; 
else if payo9603>0 then startaux=199603; 
else if payo9604>0 then startaux=199604; 
else if payo9605>0 then startaux=199605; 
else if payo9606>0 then startaux=199606; 
else if payo9607>0 then startaux=199607; 
else if payo9608>0 then startaux=199608; 
else if payo9609>0 then startaux=199609; 
else if payo9610>0 then startaux=199610; 
else if payo9611>0 then startaux=199611; 
else if payo9612>0 then startaux=199612; 

else if payo9701>0 then startaux=199701;
else if payo9702>0 then startaux=199702; 
else if payo9703>0 then startaux=199703; 
else if payo9704>0 then startaux=199704; 
else if payo9705>0 then startaux=199705; 
else if payo9706>0 then startaux=199706; 
else if payo9707>0 then startaux=199707; 
else if payo9708>0 then startaux=199708; 
else if payo9709>0 then startaux=199709; 
else if payo9710>0 then startaux=199710; 
else if payo9711>0 then startaux=199711; 
else if payo9712>0 then startaux=199712; 

else if payo9801>0 then startaux=199801;
else if payo9802>0 then startaux=199802; 
else if payo9803>0 then startaux=199803; 
else if payo9804>0 then startaux=199804; 
else if payo9805>0 then startaux=199805; 
else if payo9806>0 then startaux=199806; 
else if payo9807>0 then startaux=199807; 
else if payo9808>0 then startaux=199808; 
else if payo9809>0 then startaux=199809; 
else if payo9810>0 then startaux=199810; 
else if payo9811>0 then startaux=199811; 
else if payo9812>0 then startaux=199812; 

else if payo9901>0 then startaux=199901;
else if payo9902>0 then startaux=199902; 
else if payo9903>0 then startaux=199903; 
else if payo9904>0 then startaux=199904; 
else if payo9905>0 then startaux=199905; 
else if payo9906>0 then startaux=199906; 
else if payo9907>0 then startaux=199907; 
else if payo9908>0 then startaux=199908; 
else if payo9909>0 then startaux=199909; 
else if payo9910>0 then startaux=199910; 
else if payo9911>0 then startaux=199911; 
else if payo9912>0 then startaux=199912; 

else if payo0001>0 then startaux=200001;
else if payo0002>0 then startaux=200002; 
else if payo0003>0 then startaux=200003; 
else if payo0004>0 then startaux=200004; 
else if payo0005>0 then startaux=200005; 
else if payo0006>0 then startaux=200006; 
else if payo0007>0 then startaux=200007; 
else if payo0008>0 then startaux=200008; 
else if payo0009>0 then startaux=200009; 
else if payo0010>0 then startaux=200010; 
else if payo0011>0 then startaux=200011; 
else if payo0012>0 then startaux=200012; 

else if payo0101>0 then startaux=200101;
else if payo0102>0 then startaux=200102; 
else if payo0103>0 then startaux=200103; 
else if payo0104>0 then startaux=200104; 
else if payo0105>0 then startaux=200105; 
else if payo0106>0 then startaux=200106; 
else if payo0107>0 then startaux=200107; 
else if payo0108>0 then startaux=200108; 
else if payo0109>0 then startaux=200109; 
else if payo0110>0 then startaux=200110; 
else if payo0111>0 then startaux=200111; 
else if payo0112>0 then startaux=200112; 

else if payo0201>0 then startaux=200201;
else if payo0202>0 then startaux=200202; 
else if payo0203>0 then startaux=200203; 
else if payo0204>0 then startaux=200204; 
else if payo0205>0 then startaux=200205; 
else if payo0206>0 then startaux=200206; 
else if payo0207>0 then startaux=200207; 
else if payo0208>0 then startaux=200208; 
else if payo0209>0 then startaux=200209; 
else if payo0210>0 then startaux=200210; 
else if payo0211>0 then startaux=200211; 
else if payo0212>0 then startaux=200212; 

else if payo0301>0 then startaux=200301;
else if payo0302>0 then startaux=200302; 
else if payo0303>0 then startaux=200303; 
else if payo0304>0 then startaux=200304; 
else if payo0305>0 then startaux=200305; 
else if payo0306>0 then startaux=200306; 
else if payo0307>0 then startaux=200307; 
else if payo0308>0 then startaux=200308; 
else if payo0309>0 then startaux=200309; 
else if payo0310>0 then startaux=200310; 
else if payo0311>0 then startaux=200311; 
else if payo0312>0 then startaux=200312; 

else if payo0401>0 then startaux=200401;
else if payo0402>0 then startaux=200402; 
else if payo0403>0 then startaux=200403; 
else if payo0404>0 then startaux=200404; 
else if payo0405>0 then startaux=200405; 
else if payo0406>0 then startaux=200406; 
else if payo0407>0 then startaux=200407; 
else if payo0408>0 then startaux=200408; 
else if payo0409>0 then startaux=200409; 
else if payo0410>0 then startaux=200410; 
else if payo0411>0 then startaux=200411; 
else if payo0412>0 then startaux=200412; 

else if payo0501>0 then startaux=200501;
else if payo0502>0 then startaux=200502; 
else if payo0503>0 then startaux=200503; 
else if payo0504>0 then startaux=200504; 
else if payo0505>0 then startaux=200505; 
else if payo0506>0 then startaux=200506; 
else if payo0507>0 then startaux=200507; 
else if payo0508>0 then startaux=200508; 
else if payo0509>0 then startaux=200509; 
else if payo0510>0 then startaux=200510; 
else if payo0511>0 then startaux=200511; 
else if payo0512>0 then startaux=200512; 

else if payo0601>0 then startaux=200601;
else if payo0602>0 then startaux=200602; 
else if payo0603>0 then startaux=200603; 
else if payo0604>0 then startaux=200604; 
else if payo0605>0 then startaux=200605; 
else if payo0606>0 then startaux=200606; 
else if payo0607>0 then startaux=200607; 
else if payo0608>0 then startaux=200608; 
else if payo0609>0 then startaux=200609; 
else if payo0610>0 then startaux=200610; 
else if payo0611>0 then startaux=200611; 
else if payo0612>0 then startaux=200612; 

else if payo0701>0 then startaux=200701;
else if payo0702>0 then startaux=200702; 
else if payo0703>0 then startaux=200703; 
else if payo0704>0 then startaux=200704; 
else if payo0705>0 then startaux=200705; 
else if payo0706>0 then startaux=200706; 
else if payo0707>0 then startaux=200707; 
else if payo0708>0 then startaux=200708; 
else if payo0709>0 then startaux=200709; 
else if payo0710>0 then startaux=200710; 
else if payo0711>0 then startaux=200711; 
else if payo0712>0 then startaux=200712; 

else if payo0801>0 then startaux=200801;
else if payo0802>0 then startaux=200802; 
else if payo0803>0 then startaux=200803; 
else if payo0804>0 then startaux=200804; 
else if payo0805>0 then startaux=200805; 
else if payo0806>0 then startaux=200806; 
else if payo0807>0 then startaux=200807; 
else if payo0808>0 then startaux=200808; 
else if payo0809>0 then startaux=200809; 
else if payo0810>0 then startaux=200810; 
else if payo0811>0 then startaux=200811; 
else if payo0812>0 then startaux=200812; 

else if payo0901>0 then startaux=200901;
else if payo0902>0 then startaux=200902; 
else if payo0903>0 then startaux=200903; 
else if payo0904>0 then startaux=200904; 
else if payo0905>0 then startaux=200905; 
else if payo0906>0 then startaux=200906; 
else if payo0907>0 then startaux=200907; 
else if payo0908>0 then startaux=200908; 
else if payo0909>0 then startaux=200909; 
else if payo0910>0 then startaux=200910; 
else if payo0911>0 then startaux=200911; 
else if payo0912>0 then startaux=200912; 

else if payo1001>0 then startaux=201001;
else if payo1002>0 then startaux=201002; 
else if payo1003>0 then startaux=201003; 
else if payo1004>0 then startaux=201004; 
else if payo1005>0 then startaux=201005; 
else if payo1006>0 then startaux=201006; 
else if payo1007>0 then startaux=201007; 
else if payo1008>0 then startaux=201008; 
else if payo1009>0 then startaux=201009; 
else if payo1010>0 then startaux=201010; 
else if payo1011>0 then startaux=201011; 
else if payo1012>0 then startaux=201012;
*****************************************;

* Payment start dates;
startpay_year=int(startpay/100);
startpay_month=startpay - startpay_year*100;
startpay_yyyymm=startpay;
startpay=mdy(startpay_month, 1, startpay_year);

startaux_year=int(startaux/100);
startaux_month=startaux - startaux_year*100;
startaux_yyyymm=startaux;
startaux=mdy(startaux_month, 1, startaux_year);
diff_start=(startaux_year-startpay_year)*12 + startaux_month - startpay_month;

* Adding an SSI identifier;
concurrent=sum(of conc9601-conc9912, of conc0001-conc1012);
if concurrent>0 then ssi=1; else ssi=0;

drop dirpay0001-dirpay1212 dirpay9601-dirpay9912 conc9601-conc9912 conc0001-conc1012 payo9601-payo9912 payo0001-payo1012;

proc freq; tables startpay_year;
run;

*** merging SSA payment information back into the original dataset ***;
data h;
merge d g;
by ssn;

*Restriction - given we have already restricted the sample to individuals with one date of filing,
the date of initial eligibility should be the same as the date of current eligibility;
if doec=doei;

*Restriction - removing records with no sex identifier;
if male=. then delete;

*Restriction - remove individuals with imputed dates of birth (only ~0.15% of cases);
if dobflag=0;

*Restriction - remove the 1,000 cases with an imputed date of death;
if dodflag=0;

*Restriction - removing dual eligibles (i.e., eligible based on another person's record);
if dualelig=0; *removing ~0.5% of sample;

*Restriction - to 4 or less changes in AIME;
if changes<=4;

*Restriction - matching PIA date to date of eligibility (only 0.1% of cases where does not happen);
if pia_yyyymm=elig_yyyymm; 

*How many months date of eligibility is after date of filing;
months_elig_applic=intck('month', bdof1, doec);

*Restriction - date of eligibility should not go back further than 17 months, and is generally
not later than 12 months after date of filing;
if -17<=months_elig_applic<=12;

*How many months after first paid beneficiary dies;
death_months=intck('month', startpay, dodbest);

*How many months payments begin after date of filing;
months_payment_applic=intck('month', bdof1, startpay);

*Restriction - remove people who waited along time between filing and payment start ;
if -99<=months_payment_applic<=48;

*Restriction - age;
if 21<=age_filing<=61; 

*Family payments;
fmax=pia;
if pia<783 & ssi=1 then fmax=0.9*783; /*SSI supplements*/
if ime_bp1>90.56603774 & -1<=diff_start<=0 then fmax=0.85*ime;
if ime_fm >0 & -1<=diff_start<=0 then fmax=1.5*pia;

if sample=1;

keep 
	ssn sample
	ime ime_bp1 ime_bp2 ime_fm pia fmax diff_start family_ben
	startpay startpay_year 
	dobbest dodbest 
	age_filing	male black white otherrace
	dds reconsid hearings dig1-dig5
	ssi 
	;

proc sort; by ssn; run;


*** Adding earnings data ***;

* earnings data;
data earn1;
	set data.<detailed_earnings_record>;
	sample=1;
	keep ssn sample tlwage1987-tlwage2013;	
	run;

* adding the cpi and wage index;
data indexes;
	input sample cpi1987-cpi2013;
	cards; 
	1	
	2.114905434	2.087764496	2.00361276	1.926550731	1.840067555	1.745794645	1.683504961	1.634470836	1.593051497	
	1.549660989	1.510390828	1.467823934	1.437633628	1.419184233	1.384569983	1.337748776	1.303848709	1.285846853
	1.259399465	1.226289644	1.177991973	1.14036009	1.114721496	1.053612	1.053612	1.017000000 1	
	;

data earn2;
	merge earn1 indexes;
	by sample;
	array tlwage_array(27) tlwage1987-tlwage2013;
	array wage_cpi_array(27) wage_cpi1987-wage_cpi2013; 

	do k=1 to 27;
		wage_cpi_array(k)=tlwage_array(k)*cpi_array(k);
	end;

data earn2;
	set earn2;
	keep ssn wage_cpi1987-wage_cpi2013;

data earn3;
	merge h earn2;
	by ssn;
	if startpay_year~=. & wage_cpi2013~=. ;
	if sample=1;

* array of earnings years;
array te {27}
		wage_cpi1987 wage_cpi1988 wage_cpi1989 wage_cpi1990 wage_cpi1991 wage_cpi1992 wage_cpi1993 wage_cpi1994 wage_cpi1995 
		wage_cpi1996 wage_cpi1997 wage_cpi1998 wage_cpi1999 wage_cpi2000 wage_cpi2001 wage_cpi2002 wage_cpi2003 wage_cpi2004 
		wage_cpi2005 wage_cpi2006 wage_cpi2007 wage_cpi2008 wage_cpi2009 wage_cpi2010 wage_cpi2011 wage_cpi2012 wage_cpi2013;

* define the subscripts for the above array;
		if startpay_year=1997 then t=11;
		if startpay_year=1998 then t=12;
		if startpay_year=1999 then t=13;
		if startpay_year=2000 then t=14;
		if startpay_year=2001 then t=15;
		if startpay_year=2002 then t=16;
		if startpay_year=2003 then t=17;
		if startpay_year=2004 then t=18;
		if startpay_year=2005 then t=19;
		if startpay_year=2006 then t=20;
		if startpay_year=2007 then t=21;
		if startpay_year=2008 then t=22;
		if startpay_year=2009 then t=23;
		if startpay_year=2010 then t=24;
		if startpay_year=2011 then t=25;
		if startpay_year=2012 then t=26;

* define earnings going forward from PHUS start of payments;
	if startpay_year>=1997 & startpay_year<=2009 then do;
        ep1=te(t+1);  * earnings one year later;
        ep2=te(t+2);
	  	ep3=te(t+3);
	  	ep4=te(t+4);
	end;
	else if startpay_year=2010 then do;
        ep1=te(t+1);  
        ep2=te(t+2);
		ep3=te(t+3);
	end;
	else if startpay_year=2011 then do;
        ep1=te(t+1);  
        ep2=te(t+2);
	end;
	else if startpay_year=2012 then do;
        ep1=te(t+1);
	end;

	label ep1='Earnings at t+1';
	label ep2='Earnings at t+2';
	label ep3='Earnings at t+3';
	label ep4='Earnings at t+4';
	run;

data earn4
	set earn3;
	drop wage_cpi1987-wage_cpi2011 s t;
proc sort; by ssn; run;

*** merging earnings back into the original dataset ***;
data di_file;
merge h earn4;
by ssn;
proc export data=di_file outfile= "<working>\di_file.dta";
run;


DATA _NULL_ ;
Set StartTime;
ETIME=TIME() ;
RunTIME = ETime - Stime  ;
  PUT // '<<<  END TIME and RUN TIME  >>>';
  PUT    '--------------------' ;
  PUT    'TIME: ' ETime TIME.  '       Run Time: ' RunTime TIME.;
  PUT // ' ' ;
RUN ;


************************************************************************************
* Code used to define the non-DI sample and calculate proximity to the bend points *
************************************************************************************;

/* 
The base data is the Continuous Work History Sample (CWHS).

Also uses an extract of the Numident to remove individuals already died.

Also uses extracts of the MBR & 831 File to remove DI beneficiaries.

This calculator computes potential AIME/PIA for the working-age
population at one point in time, the start of 2007 in this case.

From the public site: http://www.ssa.gov/OACT/ProgData/insured.html

Disability Insured
     You have disability-insured status if you:

     1.have earned at least 20 QCs during the last 10 years, and

     2.are fully insured.

     Exceptions apply for those under age 31 and in certain other cases.

Fully Insured
     To be fully insured, you need at least one QC for each calendar year after you
     turned 21 and the earliest of the following:

     1.the year before you attain age 62,

     2.the year before you die, or

     3.the year you become disabled.

     The minimum number of QCs needed is 6. The maximum number needed is 40.
     Any year (all or part of a year) that was included in a period of
     disability is not included in determining the number of QCs you need.

Permanently Insured

     You are permanently insured if you are fully insured and you will not
     lose your fully-insured status when you stop working under covered employment.

**********************************************************************************************/ ;

data zero;
     set data.serad110823;

proc datasets;
           append base=zero
           data=data.serad110824 ;
           run;

proc datasets;
           append base=zero
           data=data.serad110825 ;
           run;

proc datasets;
           append base=zero
           data=data.serad110826 ;
           run;

proc datasets;
           append base=zero
           data=data.serad110830 ;
           run;

* Calculate everything as of this year: the potential filing date ;
%let fdate=1997 ;

data one;
     set zero ;
     if DOBYY~=. ;
     if DOBMM=. then DOBMM=6 ;
     * age ;
     filedate = MDY(1,1,&fdate);
     DOB = MDY(DOBMM,15,DOBYY) ;
     age = int(yrdif(DOB,filedate,'AGE')) ;

     * years turning specific ages;
     y_21 = DOBYY+21 ; label y_21 = "year turned 21" ;
     y_62 = DOBYY+62 ; label y_62 = "year turned 62" ;

data two ;
     set one;
     if age>=18 and age<65  ;

/*********************************************************************************************

     Define disability insured status and calculate PIA for working-age people.
     To be disability insured, one must be fully insured and either currently
     insured or blind.  Being fully insured means a worker has total quarters
     of coverage at least as great as the number of years between age 21
     and the year in which he or she turns age 62, becomes disabled, or dies.

     For working-age people in 2007:
           age = 18 to 64
           dob = 1989 to 1943
           y_18= 2007 to 1961

     Therefore, we don't need to worry about quarters as defined previous to 1953
     for this application. And, attaining age 21 will always be later than 1950 ;

**********************************************************************************************/

     array wqc{1951:2010} wqc1951-wqc2010 ;
     array q{1951:2010} q1951-q2010 ;
     badvar=0;

     * quarters of coverage ;
     do i = 1951 to 2010 ;
           if wqc{i}=0         then q{i}=0 ;
           else if wqc{i}=1000 then q{i}=1 ;
           else if wqc{i}=1100 then q{i}=2 ;
           else if wqc{i}=1110 then q{i}=3 ;
           else if wqc{i}=1111 then q{i}=4 ;
           else q{i}=-999 and badvar=1; ;
     end;

     quarters=0;
     do i = 1951 to &fdate ;
           quarters=quarters + q{i} ;
     end;

/*************************************************************************

     From POMS RS 00301.105 Fully Insured Status

     Number of QCs Under the 1-for-4 Rule:

     A fully insured NH (number holder) has at least one QC (whenever acquired)
     for each calendar year after 1950 or after the year in which the person
     attained age 21, if later, up to the year in which the NH attains age 62.

     Note: this contradicts the public info which says 'the year before
     turning age 62'. I'm going with this one (POMS).

*************************************************************************/

     if quarters>=6 and (quarters>= 40 | quarters>=min(y_62-y_21,&fdate-y_21))
           then fullins=1;
           else fullins=0;
           label fullins="Fully insured status" ;

  /*************************************************************

     People under age 24 need 6 quarters of coverage in the last
     twelve quarters preceeding disability onset.

  *************************************************************/

     if age<24 then do;
           qc=sum(q{&fdate-1},q{&fdate-2},q{&fdate-3}) ;
           curins=(sum(q{&fdate-1},q{&fdate-2},q{&fdate-3})>=6) ;
           end;

  /*************************************************************

     Ages 24 to 30 need half of the number of quarters
      between age 21 & onset. Number of quarters is (age-21)*4.

  *************************************************************/

     else if (24<=age<=30) then do;
           qc=0;
           do i=1 to age-21 ;
                qc=qc+q{&fdate-i} ;
           end;
           curins=(qc>(age-21)*4/2) ;
           end;

  /*************************************************************

     Ages>=31 need 20 quarts in the last 40 quarters.

  *************************************************************/

     else if age>=31 then do;
           qc=0;
           do i=1 to 10 ;
                qc=qc+q{&fdate-i}  ;
           end;
           curins=(qc>20) ;
           end;

  /*************************************************************    

     Disability insured

  *************************************************************/

     disins=(curins=1 and fullins=1)  ;

  /*************************************************************

     Now, on to calculating the benefit amount. First, calculate
     computation years.  

     For disability, the number of years of earnings used equals
     the number of years elapsed after 1950 (or year attained
     age 21, if later) and before year of onset, minus dropout
     years equal to one-fifth of the number of number of elapsed
     years rounded to the next lower integer (to a maximum of 5).

     See Para703, page 151, of Social Security Handbook (2001).

  *************************************************************/

     elapsed= min(&fdate,y_62) - y_21 - 1 ;
     comp_yrs=max(2,(elapsed - min(5,int((elapsed/5))))) ;

   array anulearn{1951:2010} e1951-e2010;

   array idxe{1951:2010} _temporary_ ; /*Indexed annual earnings */

   array avanwage{1951:2011} _temporary_
      (2799.16 2973.32 3139.44 3155.64 3301.44
       3532.36 3641.72 3673.80 3855.80 4007.12
       4086.76 4291.40 4396.64 4576.32 4658.72
       4938.36 5213.44 5571.76 5893.76 6186.24
       6497.08 7133.80 7580.16 8030.76 8630.92
       9226.48 9779.44 10556.03 11479.46 12513.46
       13773.10 14531.34 15239.24 16135.07 16822.51
       17321.82 18426.51 19334.04 20099.55 21027.98
       21811.60 22935.42 23132.67 23753.53 24705.66
       25913.90 27426.00 28861.44 30469.84 32154.82
       32921.92 33252.09 34064.95 35648.55 36952.94 38651.41
       40405.48 41334.97 40711.61 41673.83 42979.61);

   array tax_max{1951:2013} _temporary_
      (3600 3600 3600 3600 4200
      4200 4200 4200 4800 4800
      4800 4800 4800 4800 4800
      6600 6600 7800 7800 7800
      7800 9000 10800 13200 14100
      15300 16500 17700 22900 25900
      29700 32400 35700 37800 39600
      42000 43800 45000 48000 51300
      53400 55500 57600 60600 61200
      62700 65400 68400 72600 76200
      80400 84900 87000 87900 90000
      94200 97500 102000 106800 106800
      106800 110100 113700  );

   * Bendpoint one;
  array bend1{1979:2013} _temporary_
      (180 194 211 230 254 267
      280 297 310 319 339
      356 370 387 401 422
      426 437 455 477 505
      531 561 592 606 612
      627 656 680 711 744
      761 749 767 791);

   * Bendpoint two;
   array bend2{1979:2013} _temporary_
      (1085 1171 1274 1388 1528 1612
       1691 1790 1866 1922 2044
       2145 2230 2333 2420 2545
       2567 2635 2741 2875 3043
       3202 3381 3567 3653 3689
       3779 3955 4100 4288 4483
       4586 4517 4624 4768);

   ****************for special  minimum*********************************;

   array mc{1951:2012} _temporary_
      (900 900 900 900 1050 1050 1050 1050 1200 1200 1200 1200
      1200 1200 1200 1650 1650 1950 1950 1950 1950 2250 2700
      3300 3525 3825 4125 4425 4725 5100 5500 6075 6675 7050
      7425 7825 8175 8400 8925 9525 5940 6210
      6435 6750 6795 6975 7290 7605 8055 8505
      8955 9450 9675 9765 10035 10485 10890 11385 11880 11880
      11880 12285);

   /* array holding minimum earning amounts */
   array mul_amt{1973:2012} _temporary_
      ( 8.5 9 9 9 9 9 11.50
        12.64 14.45 16.07 17.26 17.86 18.49 19.06 19.31 20.12 20.92
        21.90 23.08 23.93 24.65 25.29 26.00 26.68 27.45 28.03 28.39
        29.10 30.12 30.90 31.33 31.99 32.85 34.20 35.33 36.14 38.24
        38.24 38.24 39.62 );

   /***********************************************************************

   Indexing and sorting lifetime annual earnings:

   Indexed earnings for a given year equal actual creditable earnings
   multiplied by the national average wage for the second year before the
   worker becomes disabled (or reaches age 62 or dies) divided by the national
   average wage for the given year, except that for years after the second
   year before disablement, indexed earnings equal actual creditable earnings.

   (Annual Statistical Supplement Table 2.A8)

   ************************************************************************/    

     do i = 1951 to &fdate ;
           * if earnings are not capped at the tax max, ;
           * you would need to do that here.            ;
           if i >= 1951 and i <= &fdate-2 then
                idxe{i}=anulearn{i}*(avanwage{&fdate - 2} / avanwage{i}) ;
           else if i=&fdate | i=&fdate-1 then
                idxe{i}=anulearn{i} ;
           end;

   **Sort lifetime annual earnings in ascending order.;
   do i = 1951 to &fdate-1;
      do j = i + 1 to &fdate;
         if idxe{i} > idxe{j} then do;
            tempp=idxe{i};
            idxe{i}=idxe{j};
            idxe{j}=tempp;
         end;
      end;
      sum_earn=0;
   end;

   * Sum of earnings for computation years ;
   do i = &fdate - comp_yrs + 1 to &fdate;
      sum_earn+idxe{i};                
   end;

   * calculate AIME, truncated to next lower 10 cents ;
      aime=int((sum_earn/(comp_yrs*12))*10)/10;

   * calculate PIA ;
      if aime <= bend1{&fdate} then pia_est=.9*aime;
      else if bend2{&fdate} >= aime > bend1{&fdate}
         then pia_est=.9*bend1{&fdate}
                       + .32*(aime-bend1{&fdate});
      else if aime > bend2{&fdate}
         then pia_est=.9*bend1{&fdate}
                    + .32*(bend2{&fdate}- bend1{&fdate})
                    + .15*(aime-bend2{&fdate});
       else if aime=. then pia_est=. ;

    * calculate special minimum PIA - shouldn't apply to many, but we may wish
	  to exclude them when (spec_min=1) years of coverage for special min PIA 
       See http://www.ssa.gov/oact/COLA/yoc.html   ;
      spc_yrs=0;
      do i=1951 to &fdate;
         if mc[i] < anulearn[i] then spc_yrs=spc_yrs+1;
      end;

       * this next part loosely approximates the tables        ;
       * here: http://www.ssa.gov/oact/ProgData/tableForm.html ;
       if spc_yrs<10 then spc_yrs=0;
       else if 10<=spc_yrs<=40 then spc_yrs=spc_yrs-10 ;
       else if spc_yrs>40 then spc_yrs=30;
       mult_f=mul_amt{&fdate - 1};
       spia_est=int((spc_yrs*mult_f*10))/10;

     * final PIA estimate ;
       fpia_est=max(spia_est,pia_est); label fpia_est="Final PIA estimate"   ;
       spec_min=(spia_est>pia_est) ;   label spec_min="Spec min PIA applies" ;

* output a file of AIMEs for the DI insured ;

data cwhs;
     set two(keep= ssn mi dobmm dobyy race sex dod ed bs bt ds dt cd sei mil agi sbi finp fye lye ub
     filedate dob age y_21 y_62 spia_est pia_est fpia_est spec_min aime elapsed comp_yrs
     disins curins fullins) ;
     if disins=1 ;
     proc sort data=cwhs; by ssn; run;


*** Remove people who died before the potential claiming year;
data ein;
     set data.cwhs09_numident;
     if entry_code='T' ;
     ydeath=input(dod_yy,4.);
proc sort; by ssn; run;

data numi;
     set ein;
     by ssn;
     if first.ssn;
     keep ssn ydeath dod_yy dod_mm dod_dd ;
     run;


/******************************************************************************

Remove people previously entitled to DI:
     primary claim: {BIC_EIP,BIC_ECP}=A, and            [character here]
     disabled worker claim {TOB_EIP,TOB_ECP}=2.         [numeric]

******************************************************************************/

data zwei;
     set data.cwhs09_mbr ;
     if (bic_eip='A' or bic_ecp='A') and (tob_eip=2 or tob_ecp=2) ;
     if         bic_eip='A' and
                tob_eip=2   and
                (doeiyy_p>1900 and doeiyy_p<2050)
                then ent_yr = doeiyy_p;
     else if bic_ecp='A' and
                tob_ecp=2   and
                (doecyy_p>1900 and doecyy_p<2050)
                then ent_yr = doecyy_p;
     ssn=ssn_p;
     keep ssn ent_yr bic_eip bic_ecp tob_eip tob_ecp doeiyy_p doecyy_p dodpmm_p dodpyy_p ;
     run;

data drei;
     set zwei;
     if ent_yr>1900 and ent_yr<2050 ;
proc sort; by ssn ent_yr ;

data mbr;
     set drei;
     by ssn;
     if first.ssn ;
     run;

/******************************************************************************

Merge and restrict sample:
     1. CWHS sample
     2. no previous disability onset, or onset after analysis year
     3. no date of death or death after analysis year

******************************************************************************/

data all;
     merge      cwhs(in=in1)
                mbr
                numi;
                by ssn;
                if in1;
                if ent_yr=. or ent_yr>=&fdate ;
                if ydeath=. or ydeath>=&fdate ;
     age_cwhs=age;
     year_calendar= &fdate ;
proc sort data=all; by ssn;

proc sort data=working.initial831; by ssn;

proc freq data=all;
     title "CWHS &fdate sample: entitlement years after removing previous DI" ;
     tables ent_yr / missing;

proc freq data=all;
     title "CWHS &fdate sample: death years after removing previous deaths" ;
     tables ydeath / missing;

proc freq data=two;
     title "CWHS &fdate: badvar needs to be zero for all observations" ;
     tables badvar / missing;
     run;

proc freq data=two;
     title "Full CWHS &fdate sample: Fully insured" ;
     tables disins / missing;
     run;

proc freq data=all;
     title "CWHS &fdate sample after selection: Fully insured" ;
     tables disins / missing;

proc freq data=two;
     title "CWHS &fdate.: Special minimum PIA applies" ;
     tables spec_min / missing;
     run;

data d831;
     set working.initial831;
     year_claim=year(FLD);
     keep ssn year_claim FLD;
     run;

/*****************************************************
     Output the cleaned file
*****************************************************/ ;

data pop2005;
     merge all(in=inCWHS) d831(in=in831);
     by ssn;
     if inCWHS=1;
proc export data=pop2005 outfile= "<working>\nondi_file.dta";
run;


 
